from urllib import request
import re

base_url = 'http://www.xicidaili.com/nn/%d'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
}
f = open('xici.csv','w',encoding='utf-8')

for i in range(1,3):
    fullurl = base_url % i
    print(fullurl)
    req = request.Request(fullurl,headers=headers)
    response = request.urlopen(req)
    html = response.read().decode('utf-8')

    tr_pat = re.compile(r'<tr.+?>.+?</tr>',re.S)
    tr_list = tr_pat.findall(html)
    # print(tr_list)
    for tr in tr_list[1:]:
        td_pat = re.compile(r'<td>(.+?)</td>')
        td_list = td_pat.findall(tr)
        # host = td_list[0]
        # port = td_list[1]

        f.write(','.join(td_list) + '\n')

        # for td in td_list:
        #     print(td)

f.close()